1
2 /*
3 * SmartCrawler
4 *
5 * $Id: QuickTest.java,v 1.9 2005/08/05 15:55:53 vincool Exp $
6 * Copyright 2005 Davide Pozza
7 *
8 * This program is free software; you can redistribute it
9 * and/or modify it under the terms of the GNU General Public
10 * License as published by the Free Software Foundation;
11 * either version 2 of the License, or (at your option) any
12 * later version.
13 *
14 * This program is distributed in the hope that it will be
15 * useful, but WITHOUT ANY WARRANTY; without even the implied
16 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17 * PURPOSE. See the GNU General Public License for more
18 * details.
19 *
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the Free
22 * Software Foundation, Inc., 59 Temple Place, Suite 330,
23 * Boston, MA 02111-1307 USA
24 *
25 */
26
27 package org.smartcrawler.examples;
28
29 import org.smartcrawler.common.MalformedLinkException;
30 import org.smartcrawler.*;
31
32 /***
33 * The engine thread which is started by the {@link org.smartcrawler.Crawler}
34 *
35 * @author <a href="mailto:pozzad@alice.it">Davide Pozza</a>
36 * @version <tt>$Revision: 1.9 $</tt>
37 */
38 public class QuickTest {
39
40 /***
41 * Creates a new instance of QuickTest
42 */
43 public QuickTest() {
44
45 }
46 /***
47 * The main method
48 *
49 * @param args the command line arguments
50 */
51 public static void main(String[] args) {
52 String urlStr = null;
53 String configFileName = null;
54
55 //only for test purpose
56 //urlStr = "http://www.alice.it";
57 //configFileName = "src/bin/conf/smartcrawler-config.xml";
58
59 urlStr = "http://images.google.it/images?q=casa&hl=it";
60 configFileName = "examples/googleImages/conf/google_images-config.xml";
61 System.setProperty("extractionPatterns.file.path", "examples/googleImages/conf/extractPatterns.xml");
62 //urlStr = "http://www.nytimes.com";
63 //configFileName = "src/bin/conf/nyt_rss-config.xml";
64
65 //configFileName = "src/bin/conf/yellowPages-config.xml";
66 //urlStr = "http://www.paginegialle.it/pg/cgi/pgsearch.cgi?btt=1&ts=1&l=1&cb=0&ind=&nc=&qs=albergo&dv=vicenza&x=0&y=0";
67 //urlStr = "http://pgd.paginegialle.it/66/ct=66&cc=337100290&cl=1&iq=000212487235030529042997&cb=0";
68
69 //urlStr = "http://www.photosig.com/go/photos/browse?sort=id-d&page=1&id=1";
70 urlStr = "http://www.photosig.com/go/photos/view;jsessionid=5CACE7874611EFEBE567706E1565D291?id=1577721&forward=browse";
71 configFileName = "examples/photosig/conf/photosig-config.xml";
72 System.setProperty("extractionPatterns.file.path", "examples/photosig/conf/extractPatterns.xml");
73 try {
74
75 new Crawler(urlStr, configFileName).startEngines();
76
77 } catch (MalformedLinkException e){
78 System.out.println("Invalid initial link! " + urlStr);
79 } catch (Exception e){
80 System.out.println("Generic error");
81 e.printStackTrace();
82 }
83 }
84 }